library(ggplot2)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
testBattingFielding <- read.csv("D:\\Vishal\\III year\\Data Analytics\\Assignment II\\Player Statistics\\testcareerbattingandfielding.csv")

df <- testBattingFielding[, 1:12]
head(df)
##                           Name Matches Innings Not_Outs Runs High_Score
## 1       Aakash Shyamlal Chopra      10      19        0  437         60
## 2             Abhimanyu Mithun       4       5        0  120         46
## 3               Abhinav Mukund       5      10        0  211         62
## 4 Abraham Benjamin de Villiers     106     176       16 8074        278
## 5           Adam Charles Voges      15      21        7 1337        269
## 6         Adam Craig Gilchrist      96     137       20 5570        204
##   Average No_Of_100 No_Of_50 Strike_Rate Catches_Taken Stumpings
## 1   23.00         0        2       34.60            15        NA
## 2   24.00         0        0       48.19             0        NA
## 3   21.10         0        1       40.73             5        NA
## 4   50.46        21       39       53.75           197         5
## 5   95.50         5        4       59.36            11        NA
## 6   47.60        17       26       81.98           379        37
df <- na.omit(df)
head(df)
##                            Name Matches Innings Not_Outs Runs High_Score
## 4  Abraham Benjamin de Villiers     106     176       16 8074        278
## 6          Adam Craig Gilchrist      96     137       20 5570        204
## 24         Bradley James Haddin      66     112       13 3265        169
## 26      Brendon Barrie McCullum     101     176        9 6453        302
## 83                 Kamran Akmal      53      92        6 2648        158
## 90  Krishankumar Dinesh Karthik      23      37        1 1000        129
##    Average No_Of_100 No_Of_50 Strike_Rate Catches_Taken Stumpings
## 4    50.46        21       39       53.75           197         5
## 6    47.60        17       26       81.98           379        37
## 24   32.97         4       18       58.44           262         8
## 26   38.64        12       31       64.60           198        11
## 83   30.79         6       12       63.10           184        22
## 90   27.77         1        7       50.00            51         5
summary(df)
##                            Name       Matches          Innings      
##  Abraham Benjamin de Villiers: 1   Min.   :  1.00   Min.   :  2.00  
##  Adam Craig Gilchrist        : 1   1st Qu.: 11.00   1st Qu.: 19.00  
##  Bradley James Haddin        : 1   Median : 28.00   Median : 54.00  
##  Brendon Barrie McCullum     : 1   Mean   : 53.06   Mean   : 86.06  
##  Kamran Akmal                : 1   3rd Qu.: 96.00   3rd Qu.:144.00  
##  Krishankumar Dinesh Karthik : 1   Max.   :146.00   Max.   :233.00  
##  (Other)                     :11                                    
##     Not_Outs           Runs         High_Score       Average     
##  Min.   : 0.000   Min.   :   56   Min.   : 35.0   Min.   :21.58  
##  1st Qu.: 2.000   1st Qu.:  407   1st Qu.: 92.0   1st Qu.:30.31  
##  Median : 6.000   Median : 1546   Median :129.0   Median :33.80  
##  Mean   : 8.294   Mean   : 3172   Mean   :154.2   Mean   :36.08  
##  3rd Qu.:16.000   3rd Qu.: 5498   3rd Qu.:204.0   3rd Qu.:38.64  
##  Max.   :24.000   Max.   :12400   Max.   :319.0   Max.   :57.40  
##                                                                  
##    No_Of_100         No_Of_50      Strike_Rate    Catches_Taken  
##  Min.   : 0.000   Min.   : 0.00   Min.   :39.84   Min.   :  4.0  
##  1st Qu.: 0.000   1st Qu.: 2.00   1st Qu.:47.86   1st Qu.: 31.0  
##  Median : 2.000   Median :12.00   Median :53.75   Median : 57.0  
##  Mean   : 6.706   Mean   :16.47   Mean   :55.41   Mean   :143.8  
##  3rd Qu.: 6.000   3rd Qu.:31.00   3rd Qu.:63.10   3rd Qu.:198.0  
##  Max.   :38.000   Max.   :52.00   Max.   :81.98   Max.   :530.0  
##                                                                  
##    Stumpings    
##  Min.   : 1.00  
##  1st Qu.: 4.00  
##  Median : 5.00  
##  Mean   :11.65  
##  3rd Qu.:20.00  
##  Max.   :38.00  
## 
set.seed(20)

testStumping <- testBattingFielding %>%
  filter(Stumpings != 'NA') %>%
  select(2, 5, 12)

testStumpingCluster <- kmeans(testStumping, 3)

testStumpingCluster$cluster <- as.factor(testStumpingCluster$cluster)

plot_ly(testStumping, x = ~Matches, y = ~Stumpings, type = 'scatter',
        mode = 'markers', color = testStumpingCluster$cluster,
        text = ~paste('Name: ', df$Name)) %>%
  layout(title = "Cluster of wickets & economy")